Introduction

Benchmarking spatial analysis tools is tricky especially when there exists few ground truth data sets. Instead, we decided that we would use simulated data since this allows us to have a strong ground truth. We can also control the parameters used to create the data sets in order to simulate a variety of potential biological situations.

We wrapped our simulation in an R package - Oneiric - to faciliate to reproducibility and replicability of our work.

library(oneiric)
library(scater)
library(ggplot2)
library(RColorBrewer)
set.seed(288)

Data

Oneiric provides multiples territory structures - one of them being based on a tinkerbell progression chaos map. We provide a set of parameters that will produce useable chaos maps.

It is also possible to search for other parameters using the find_tinkerbell function.

Chaos Parameters

data(oneiric)
map_params
##                a          b           c          d         x_0         y_0
##  [1,] -1.4524975  1.2802377 -0.89194171  2.4186510  0.45164818 -0.11938045
##  [2,] -0.9833781  0.7158569 -2.22051348  0.5401524  0.15201547  0.70978265
##  [3,]  1.5359136 -0.5865262  0.46922684  1.9351586 -0.80089519 -0.25490827
##  [4,] -0.9097808 -0.8748624  2.27694361  0.4794840  0.29575882 -0.73100054
##  [5,]  2.6128295  1.2076972 -0.18903700  3.6309333 -1.35852391  0.52502747
##  [6,] -2.2279360  1.7791016 -1.20597457  1.0571294 -0.57339884  1.63350853
##  [7,] -0.8282720  1.4754709 -0.38121196 -0.8253620  1.16853433  0.08396288
##  [8,] -1.3826889  0.4116210 -1.78965910  1.3498485  0.88184490  0.56031956
##  [9,] -2.4988707 -2.8057423  0.07558119 -0.2581050  0.48210929 -1.78630243
## [10,] -0.9108967  4.3691958 -0.10709005  2.5931729 -1.00821722 -0.38977567
## [11,] -2.0506795  1.1691831 -1.40869336  1.2404386 -0.61605783  1.97442209
## [12,] -1.7135684 -0.1199584  0.48353594  1.2159022  1.18212102 -0.66240489
## [13,] -1.3700543 -0.6228237  0.04649359  1.4258482 -1.00970676  1.28502390
## [14,] -2.5149806 -4.2115682  1.35684939  1.6831248  0.05273502 -0.12617619
## [15,]  2.0762965 -0.2755917 -0.76867673  1.2246233 -0.90646219  0.54479486
## [16,] -0.5116489 -1.0998629 -0.75110047  0.5155583  0.05990936 -0.26377246
## [17,] -4.7771668  4.3060377 -1.50604446 -0.2426992  0.94797294  2.61778723
## [18,] -2.3034036 -0.2044303 -2.31393112  1.5616273 -0.05644338 -0.36995731
## [19,] -0.4071411  0.7486259 -0.41520969  1.2877298 -0.28991360  1.01219584
## [20,] -1.1977801  1.0634716 -0.52625141  1.1745420 -0.83615997 -0.83545511
## [21,] -1.1890503 -1.6608322  3.49584000  1.1665141  0.73463956 -1.78704136
## [22,] -1.1512296 -1.7267131  0.38683127 -0.7195584  0.75264573 -0.65461937
## [23,] -1.0600826 -0.9489989  0.08094950 -0.8479636  1.58579695 -0.06424196
## [24,]  1.1496231  1.0198538 -1.92018938 -0.3317371 -0.53936695  0.59072867
## [25,]  0.2651844  2.4037982  0.49374343 -0.2448217  0.32016818 -0.33049077
## [26,]  1.6048797  0.2167301  0.44938604  2.4042743 -0.11351876 -0.07733417
## [27,] -0.1355331 -1.0296461  1.09688996  0.8760724 -0.29289105 -0.93654977
## [28,]  2.1461147 -4.5485150  0.02623263  0.7284910 -0.97357690 -0.13225379
## [29,]  0.9000000 -0.6013000  2.00000000  0.5000000 -0.72000000 -0.64000000
## [30,]  0.5535363  2.2079160  0.36159485  1.8782757 -0.73090825 -0.16021286
## [31,]  0.5579728 -0.5012534  3.53268415 -0.7956229 -0.14021484 -0.51037642
## [32,] -1.4025020 -0.4331542 -0.25024455  0.2950250 -0.13910644 -1.23359252
## [33,]  3.3379829 -2.3800425  0.43428288  3.0860127 -1.23075426 -1.92820430
## [34,]  1.1540912 -1.7535453  0.08302577  0.8090087 -0.49286072 -0.17190885
## [35,] -2.1114611  1.9888498 -0.64616627  0.7607303  0.39476142  0.41183028
## [36,] -1.6844748  1.2920278  1.96527333  0.4804940  0.06424053  0.66946574
map_params <- find_tinkerbell(time = 120,
    n_maps = 96,
    plot = FALSE,
    export = FALSE,
    file_name = "maps")

Preparing output directory

output <- "/Users/martinp4/Documents/Cedars/Oneiric/simulations/"

Territory Types

Here, we demonstrate the possible territory types that can be created using oneiric.

circle <- simulate_spatial(n_cells = 5000,
    n_territories = 5,
    n_samples = 12,
    pattern = "circle",
    expanse = c(0.1, 0.3))

rod <- simulate_spatial(n_cells = 5000,
    n_territories = 5,
    n_samples = 12,
    pattern = "rod",
    width_range = c(0.0, 0.1),
    length_range = c(0.2, 0.5))

chaos_map <- simulate_spatial(n_cells = 5000,
    n_samples = 12,
    pattern = "chaos",
    expanse = 0.02)

layered_map <- simulate_spatial(n_cells = 5000,
    n_samples = 12,
    n_territories = 1,
    pattern = "circle",
    layers = 5,
    expanse =  c(0.4, 0.5))
circles <- do.call("rbind", circle)
circles$Territory <- as.factor(circles$Territory)
cols <- colorRampPalette(RColorBrewer::brewer.pal(11, "Spectral"))
cols <- cols(length(levels(circles$Territory)))
g <- ggplot(circles, aes(x = x, y = y, col = Territory)) +
    geom_point(size = 0.5) +
    theme_bw() +
    scale_color_manual(values = cols) +
    facet_wrap(~sample) +
    guides(colour = guide_legend(
        override.aes = list(size =  5)))


rods <- do.call("rbind", rod)
rods$Territory <- as.factor(rods$Territory)
cols <- colorRampPalette(RColorBrewer::brewer.pal(11, "Spectral"))
cols <- cols(length(levels(rods$Territory)))
g1 <- ggplot(rods, aes(x = x, y = y, col = Territory)) +
    geom_point(size = 0.5) +
    theme_bw() +
    scale_color_manual(values = cols) +
    facet_wrap(~sample) +
    guides(colour = guide_legend(
        override.aes = list(size =  5)))


tinker <- do.call("rbind", chaos_map)
tinker$Territory <- as.factor(tinker$Territory)
cols <- colorRampPalette(RColorBrewer::brewer.pal(11, "Spectral"))
cols <- cols(length(levels(tinker$Territory)))
g2 <- ggplot(tinker, aes(x = x, y = y, col = Territory)) +
    geom_point(size = 0.5) +
    theme_bw() +
    scale_color_manual(values = cols) +
    facet_wrap(~sample) +
    guides(colour = guide_legend(
        override.aes = list(size =  5)))


layers <- do.call("rbind", layered_map)
layers$Territory <- as.factor(layers$Territory)
cols <- colorRampPalette(RColorBrewer::brewer.pal(11, "Spectral"))
cols <- cols(length(levels(layers$Territory)))
g3 <- ggplot(layers, aes(x = x, y = y, col = Territory)) +
    geom_point(size = 0.5) +
    theme_bw() +
    scale_color_manual(values = cols) +
    facet_wrap(~sample) +
    guides(colour = guide_legend(
        override.aes = list(size =  5)))
print(g)
Circle Territories

Circle Territories

print(g1)
Rod Territories

Rod Territories

print(g2)
Chaos Map Territories

Chaos Map Territories

print(g3)
Layered Territories

Layered Territories

Creating Simulated Data Sets

This section covers how the data sets used for benchmarking Vesalius were produced. We produce data sets with no cell labels.

Simulating Circle data sets

# Making sure that we have common cell labels
circular <- simulate_spatial(n_cells = 5000,
    n_territories = 5,
    n_samples = 12,
    pattern = "circle",
    expanse = c(0.1, 0.25),
    force_cells = 10)
## Samples Produced: 1 out of 12        
Samples Produced: 2 out of 12        
Samples Produced: 3 out of 12        
Samples Produced: 4 out of 12        
Samples Produced: 5 out of 12        
Samples Produced: 6 out of 12        
Samples Produced: 7 out of 12        
Samples Produced: 8 out of 12        
Samples Produced: 9 out of 12        
Samples Produced: 10 out of 12        
Samples Produced: 11 out of 12        
Samples Produced: 12 out of 12        
circular_counts <- simulate_cells(circular,
    cell_composition = 2,
    no_label = TRUE)

circles <- do.call("rbind", circular_counts$spatial)
circles$Territory <- as.factor(circles$Territory)
cols <- colorRampPalette(RColorBrewer::brewer.pal(11, "Spectral"))
cols <- cols(length(levels(circles$Territory)))
g <- ggplot(circles, aes(x = x, y = y, col = Territory)) +
    geom_point(size = 0.5) +
    theme_bw() +
    scale_color_manual(values = cols) +
    facet_wrap(~sample) +
    guides(colour = guide_legend(
        override.aes = list(size =  5)))



circles$cell_labels <- as.factor(circles$cell_labels)
cols <- colorRampPalette(RColorBrewer::brewer.pal(11, "Spectral"))
cols <- cols(length(levels(circles$cell_labels)))
g1 <- ggplot(circles, aes(x = x, y = y, col = cell_labels)) +
    geom_point(size = 0.5) +
    theme_bw() +
    scale_color_manual(values = cols) +
    facet_wrap(~sample) +
    guides(colour = guide_legend(
        override.aes = list(size =  5)))



export_simulation(spatial = circular_counts$spatial,
    cells = circular_counts$counts,
    out_dir = output,
    file_tag = "circle_spatial_territories")
## NULL
print(g)
No label territories assumes that no cell type information is provided. A single label is given to all cells in the Territory column

No label territories assumes that no cell type information is provided. A single label is given to all cells in the Territory column

print(g1)
No label territories assumes that no cell type information is provided. Ground truth cell type labels are still provided as the cell_lables column

No label territories assumes that no cell type information is provided. Ground truth cell type labels are still provided as the cell_lables column

Circle Layers - NO Labels

circular_layer_nl <- simulate_spatial(n_cells = 5000,
    n_territories = 1,
    n_samples = 12,
    pattern = "circle",
    expanse = c(0.4, 0.5),
    layer = 5,
    force_cells = 12)
## Samples Produced: 1 out of 12        
Samples Produced: 2 out of 12        
Samples Produced: 3 out of 12        
Samples Produced: 4 out of 12        
Samples Produced: 5 out of 12        
Samples Produced: 6 out of 12        
Samples Produced: 7 out of 12        
Samples Produced: 8 out of 12        
Samples Produced: 9 out of 12        
Samples Produced: 10 out of 12        
Samples Produced: 11 out of 12        
Samples Produced: 12 out of 12        
circular_layer_counts_nl <- simulate_cells(circular_layer_nl,
    as_layer = TRUE,
    de_prob = 0.5,
    de_layer = 0.05,
    cell_composition = 2,
    no_label = TRUE)

# to mimic no cell type labels available

circles <- do.call("rbind", circular_layer_counts_nl$spatial)
circles$Territory <- as.factor(circles$Territory)
cols <- colorRampPalette(RColorBrewer::brewer.pal(11, "Spectral"))
cols <- cols(length(levels(circles$Territory)))
g <- ggplot(circles, aes(x = x, y = y, col = Territory)) +
    geom_point(size = 0.5) +
    theme_bw() +
    scale_color_manual(values = cols) +
    facet_wrap(~sample) +
    guides(colour = guide_legend(
        override.aes = list(size =  5)))


circles$cell_labels <- as.factor(circles$cell_labels)
cols <- colorRampPalette(RColorBrewer::brewer.pal(11, "Spectral"))
cols <- cols(length(levels(circles$cell_labels)))
g1 <- ggplot(circles, aes(x = x, y = y, col = cell_labels)) +
    geom_point(size = 0.5) +
    theme_bw() +
    scale_color_manual(values = cols) +
    facet_wrap(~sample) +
    guides(colour = guide_legend(
        override.aes = list(size =  5)))


export_simulation(spatial = circular_layer_counts_nl$spatial,
    cells = circular_layer_counts_nl$counts,
    out_dir = output,
    file_tag = "layered_spatial_territories")
## NULL
print(g)
No label territories assumes that no cell type information is provided. A single label is given to all cells in the Territory column

No label territories assumes that no cell type information is provided. A single label is given to all cells in the Territory column

print(g1)
No label territories assumes that no cell type information is provided. Ground truth cell type labels are still provided as the cell_lables column

No label territories assumes that no cell type information is provided. Ground truth cell type labels are still provided as the cell_lables column